Introduction

Reads were first de-multiplexed with ea-utils (fastq-multx) with an allowed barcode mismatch of 1 (Default). De-multiplexed reads were then subjected to an alignment against the UniVec vector contamination database using the adapter removal tool within ea-utils (fastq-mcf). Barcodes were extracted by remove the last 12 nucleotides from each read.

Next reads were subjected to an alignment against the Oligo library using a DNA aligner. We used Bowtie2…

Data import & preprocessing

We are going to first import the merged counts tables that is outputted from the Nextflow pipeline. This table already has the raw counts summarised from the alignments and merged into a single matrix.

Import data

# Import counts
counts = readRDS('../data/CD8-counts.rda')
head(counts)
##         cdk453_a cdk453_b d1_10_a d1_10_b d1_100_a d1_100_b d1_1000_a d1_1000_b
## oligo_1     1551     1612    2369    2561     2569     2559      4852      2483
## oligo_2     4038     4485    7276    6028     7360     6630     10887      6771
## oligo_3     1898     2014    2751    2228     3160     2711      4475      2407
## oligo_4     2351     2285    3168    3149     3758     3361      5825      3646
## oligo_5     3251     3546    4672    4928     5789     5114      8571      4926
## oligo_6     2132     2011    3146    2560     3108     2734      4388      2809
##         d1_300_a d1_300_b id3_a id3_b mock_a mock_b
## oligo_1     2953     2841  2709  2110   4105   3251
## oligo_2     7919     8438  7396  6572  12822   8352
## oligo_3     3062     3386  3042  2084   4337   3272
## oligo_4     3659     4049  3770  3303   5496   3992
## oligo_5     5662     6434  5276  4044   7890   5831
## oligo_6     3122     3320  3382  2682   5531   3180
# Import metadata
metadata = readRDS('../data/CD8-metadata.rda')
head(metadata)
##            Sample Sample name Group Index barcode
## mock_a     mock_a      mock A  Mock       GGAATTC
## mock_b     mock_b      mock B  Mock       TTAGTGG
## cdk453_a cdk453_a       #53 A  CDK4       ACAACGA
## cdk453_b cdk453_b       #53 B  CDK4       ACCAATG
## id3_a       id3_a       1D3 A   ID3       CAACAGC
## id3_b       id3_b       1D3 B   ID3       CACACGT
# Import MultiQC results
report = readRDS('../data/CD8-multiqc.rda')
head(report)
## # A tibble: 6 × 47
##   Sample   raw_total_seque… filtered_sequen… sequences is_sorted `1st_fragments`
##   <chr>               <dbl>            <dbl>     <dbl>     <dbl>           <dbl>
## 1 cdk453_…          8434046                0   8434046         1         8434046
## 2 cdk453_…          8685727                0   8685727         1         8685727
## 3 d1_1000…         21558627                0  21558627         1        21558627
## 4 d1_1000…         12555470                0  12555470         1        12555470
## 5 d1_100_…         14081247                0  14081247         1        14081247
## 6 d1_100_…         12753978                0  12753978         1        12753978
## # … with 41 more variables: last_fragments <dbl>, reads_mapped <dbl>,
## #   reads_mapped_and_paired <dbl>, reads_unmapped <dbl>,
## #   reads_properly_paired <dbl>, reads_paired <dbl>, reads_duplicated <dbl>,
## #   reads_MQ0 <dbl>, reads_QC_failed <dbl>, `non-primary_alignments` <dbl>,
## #   total_length <dbl>, total_first_fragment_length <dbl>,
## #   total_last_fragment_length <dbl>, bases_mapped <dbl>,
## #   `bases_mapped_(cigar)` <dbl>, bases_trimmed <dbl>, …
# Import oligo annotations
annotations = readRDS('../data/CD8-annotations.rda') 
head(annotations)
## # A tibble: 6 × 3
##   oligo_id gene_name gene_id    
##   <chr>    <chr>     <chr>      
## 1 oligo_1  MAGEA1    MAGEA1 #1  
## 2 oligo_2  gene1394  gene1394 #1
## 3 oligo_3  gene2221  gene2221 #1
## 4 oligo_4  gene1715  gene1715 #1
## 5 oligo_5  gene1014  gene1014 #1
## 6 oligo_6  gene500   gene500 #1

Mapping quality

# Mean
mean(report$raw_total_sequences)
## [1] 14162045
median(report$raw_total_sequences)
## [1] 13417612
range(report$raw_total_sequences)
## [1]  8434046 23626015

Number of mapped reads

p1 = report %>% 
  select(Sample, reads_unmapped, reads_mapped) %>% 
  mutate(Sample = str_replace(Sample, '-bamstats', '')) %>% 
  pivot_longer(cols = c("reads_unmapped", "reads_mapped"), names_to = "feature", values_to = "reads") %>% 
    mutate(feature = if_else(feature == "reads_unmapped", "Unmapped", feature)) %>% 
  mutate(feature = if_else(feature == "reads_mapped", "Mapped", feature)) %>% 
  ggplot(aes(x = reorder(Sample, reads), y = reads, fill = feature)) +
  geom_col() +
  coord_flip() +
  theme_classic(base_size = 10) +
  theme(legend.position = "top") +
  scale_fill_brewer(palette = "Set2") +
  xlab("") +
  ylab("Number of reads") +
  ggtitle("CD8: mapping rate")
p1

### Percent of mapped reads
p2 = report %>% 
  select(Sample, reads_unmapped_percent, reads_mapped_percent ) %>% 
  mutate(Sample = str_replace(Sample, '-bamstats', '')) %>% 
  pivot_longer(cols = c("reads_unmapped_percent", "reads_mapped_percent"), names_to = "feature", values_to = "reads") %>% 
  mutate(reads = reads / 100) %>% 
  mutate(feature = if_else(feature == "reads_unmapped_percent", "Unmapped", feature)) %>% 
  mutate(feature = if_else(feature == "reads_mapped_percent", "Mapped", feature)) %>% 
  ggplot(aes(x = reorder(Sample, reads), y = reads, fill = feature)) +
  geom_col() +
  coord_flip() +
  theme_classic(base_size = 10) +
  theme(legend.position = "top") +
  scale_fill_brewer(palette = "Set2") +
  scale_y_continuous(labels = scales::percent) +
  xlab("") +
  ylab("Percent of reads")
p2

# Plot together
p = (p1 | p2) + plot_layout(guides = "collect") & theme(legend.position = "bottom")
p

ggsave("figures/Benchmark-CD8-mapping-qc.pdf", p, width = 8, height = 5)

Normalize data

# Match the sample ID's between samples
counts = counts[,match(metadata$Sample, colnames(counts))]

# Make DESeq2 object
ddsMat <- DESeqDataSetFromMatrix(countData = counts,
                                 colData = metadata,
                                 design = ~Group)

# Get normalize counts
ddsMat <- estimateSizeFactors(ddsMat)

# Filter flow counts
ddsMat.fil = ddsMat %>% 
  filter_low(percentile = 0.05)

# Get normalized counts 
counts.norm.fil <- (counts(ddsMat.fil, normalized = TRUE) + 0.5) %>% 
  as.data.frame() %>% 
  rownames_to_column("oligo_id")

# Create a long table 
counts.norm.fil.long = counts.norm.fil %>% 
  pivot_longer(cols = -oligo_id, names_to = "sampleId", values_to = "norm")

Mock replicates

counts.norm.fil %>% 
  ggplot(., aes(x = log10(mock_a), y = log10(mock_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("Mock replicate #1 (log10)") +
  ylab("Mock replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

1D3 replicates

counts.norm.fil %>% 
  ggplot(aes(x = log10(id3_a), y = log10(id3_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1D3 replicate #1 (log10)") +
  ylab("1D3 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

CDK4-53 replicates

counts.norm.fil %>% 
  ggplot(aes(x = log10(cdk453_a), y = log10(cdk453_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("CDK4/53 replicate #1 (log10)") +
  ylab("CDK4/53 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Dilution 1:10 replicates

counts.norm.fil %>% 
  ggplot(aes(x = log10(d1_10_a), y = log10(d1_10_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:10 replicate #1 (log10)") +
  ylab("1:10 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..))

Dilution 1:100 replicates

counts.norm.fil %>% 
  ggplot(aes(x = log10(d1_100_a), y = log10(d1_100_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:100 replicate #1 (log10)") +
  ylab("1:100 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Dilution 1:300 replicates

counts.norm.fil %>% 
  ggplot(aes(x = log10(d1_300_a), y = log10(d1_300_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:300 replicate #1 (log10)") +
  ylab("1:300 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Dilution 1:1000 replicates

counts.norm.fil %>% 
  ggplot(aes(x = log10(d1_1000_a), y = log10(d1_1000_b))) +
  geom_point(colour = alpha("grey", 0.7)) +
  xlab("1:1000 replicate #1 (log10)") +
  ylab("1:1000 replicate #2 (log10)") +
  theme_light(base_size = 11) +
  stat_cor(method = "pearson", aes(label = ..r.label..)) 

Plot normalized histogram

counts.norm.fil.long %>% 
  ggplot(aes(x = log10(norm))) +
  geom_histogram(aes(y=..density..), binwidth=0.25, colour="black", fill="white") +
  geom_density(alpha = 0.2, fill = "#FF6666") +
  facet_wrap(sampleId~.) +
  theme_light(base_size = 11) +
  ylab("Density") +
  xlab("Normalized reads per oligo (log10)") 


Determine dropout oligo’s

MART1/1D3 vs. Mock

mart1_mock.res = compute_stats(ddsMat.fil, trt = "ID3", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>% 
  left_join(annotations) %>% 
  mutate(log2FoldChange = round(log2FoldChange, 2)) %>% 
  mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) 
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## 
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up)    : 0, 0%
## LFC < -0.25 (down) : 3, 0.066%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 330)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
mart1_mock.res %>% 
  filter(padj < 0.05)
##      oligo_id baseMean log2FoldChange     lfcSE       stat        pvalue
## 1     MART1_b 2308.170          -1.17 0.1123802  -8.208217  1.122482e-16
## 2 MART1_ELA_a 1575.126          -2.59 0.1664753 -14.031382  5.008980e-45
## 3 MART1_ELA_b 1709.922          -4.11 0.1546682 -24.974480 5.789449e-138
##            padj log2FoldChangeShrunken gene_name      gene_id
## 1  1.693077e-13                  -0.57   gene812   gene812 #2
## 2  1.133282e-41                  -0.73 MART1_ELA MART1_ELA #1
## 3 2.619725e-134                  -1.28 MART1_ELA MART1_ELA #2
# Browse
mart1_mock.res %>% 
  select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>% 
  DT::datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
  ))
# Plot MA-plot
mart1_mock.maplot = mart1_mock.res %>% 
  ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(data = filter(mart1_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
  geom_point(data = filter(mart1_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(mart1_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(mart1_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
  geom_text_repel(data = filter(mart1_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
  theme_classic() +
  theme_classic() +
  xlab("Mean counts (log2)") +
  ylab("Fold change log2(MART1/Mock)")  +
  ggtitle("MART1")
ggplotly(mart1_mock.maplot)

D10

d10_mock.res = compute_stats(ddsMat.fil, trt = "D10", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>% 
  left_join(annotations) %>% 
  mutate(log2FoldChange = round(log2FoldChange, 2)) %>% 
  mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## 
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up)    : 0, 0%
## LFC < -0.25 (down) : 5, 0.11%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 321)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d10_mock.res %>% 
  filter(padj < 0.05)
##      oligo_id  baseMean log2FoldChange     lfcSE       stat        pvalue
## 1   CDK4mut_a  772.9757          -2.59 0.2186967 -10.712447  4.448893e-27
## 2   CDK4mut_b  596.6486          -4.47 0.2832977 -14.898259  1.691392e-50
## 3     MART1_b 2548.5431          -0.75 0.1163850  -4.313486  8.035024e-06
## 4 MART1_ELA_a 1579.6401          -2.55 0.1577153 -14.601475  1.374162e-48
## 5 MART1_ELA_b 1695.6118          -4.35 0.1653377 -24.768457 9.808023e-136
##            padj log2FoldChangeShrunken gene_name      gene_id
## 1  5.032810e-24                  -0.49   CDK4mut   CDK4mut #1
## 2  3.826774e-47                  -0.50   CDK4mut   CDK4mut #2
## 3  7.271696e-03                  -0.36   gene812   gene812 #2
## 4  2.072695e-45                  -0.81 MART1_ELA MART1_ELA #1
## 5 4.438130e-132                  -1.25 MART1_ELA MART1_ELA #2
# Browse
d10_mock.res %>% 
  select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>% 
  DT::datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
  ))
# Plot MA-plot
d10_mock.maplot = d10_mock.res %>% 
  ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(data = filter(d10_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
  geom_point(data = filter(d10_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d10_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d10_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
  geom_text_repel(data = filter(d10_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
  theme_classic() +
  theme_classic() +
  xlab("Mean counts (log2)") +
  ylab("Fold change log2(D10/Mock)") 
ggplotly(d10_mock.maplot)

D100

d100_mock.res = compute_stats(ddsMat.fil, trt = "D100", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>% 
  left_join(annotations) %>% 
  mutate(log2FoldChange = round(log2FoldChange, 2)) %>% 
  mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## 
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up)    : 0, 0%
## LFC < -0.25 (down) : 4, 0.088%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 311)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d100_mock.res %>% 
  filter(padj < 0.05) 
##      oligo_id  baseMean log2FoldChange     lfcSE       stat        pvalue
## 1   CDK4mut_a  813.4943          -2.14 0.1875386 -10.093039  2.965222e-24
## 2   CDK4mut_b  690.1380          -2.26 0.2605368  -7.721243  5.760017e-15
## 3 MART1_ELA_a 1642.6568          -2.20 0.1250384 -15.620545  2.637637e-55
## 4 MART1_ELA_b 1871.9163          -2.66 0.1137217 -21.188122 6.145161e-100
##           padj log2FoldChangeShrunken gene_name      gene_id
## 1 4.472543e-21                  -0.53   CDK4mut   CDK4mut #1
## 2 6.516020e-12                  -0.32   CDK4mut   CDK4mut #2
## 3 5.967654e-52                  -0.96 MART1_ELA MART1_ELA #1
## 4 2.780685e-96                  -1.28 MART1_ELA MART1_ELA #2
# Browse
d100_mock.res %>% 
  select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>% 
  DT::datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
  ))
# Plot MA-plot
d100_mock.maplot = d100_mock.res %>% 
  ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(data = filter(d100_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
  geom_point(data = filter(d100_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d100_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d100_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
  geom_text_repel(data = filter(d100_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
  theme_classic() +
  theme_classic() +
  xlab("Mean counts (log2)") +
  ylab("Fold change log2(D100/Mock)") 
ggplotly(d100_mock.maplot)

D300

d300_mock.res = compute_stats(ddsMat.fil, trt = "D300", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>% 
  left_join(annotations) %>% 
  mutate(log2FoldChange = round(log2FoldChange, 2)) %>% 
  mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## 
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up)    : 0, 0%
## LFC < -0.25 (down) : 2, 0.044%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 338)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d300_mock.res %>% 
  filter(padj < 0.05) 
##      oligo_id baseMean log2FoldChange     lfcSE      stat       pvalue
## 1 MART1_ELA_a 1993.798          -1.07 0.1143751 -7.143311 4.555459e-13
## 2 MART1_ELA_b 2299.487          -1.24 0.1001225 -9.906465 1.951041e-23
##           padj log2FoldChangeShrunken gene_name      gene_id
## 1 1.030673e-09                  -0.52 MART1_ELA MART1_ELA #1
## 2 8.828460e-20                  -0.69 MART1_ELA MART1_ELA #2
# Browse
d300_mock.res %>% 
  select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>% 
  DT::datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
  ))
# Plot MA-plot
d300_mock.maplot = d300_mock.res %>% 
  ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(data = filter(d300_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
  geom_point(data = filter(d300_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d300_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d300_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
  geom_text_repel(data = filter(d300_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
  theme_classic() +
  theme_classic() +
  xlab("Mean counts (log2)") +
  ylab("Fold change log2(D300/Mock)") 
ggplotly(d300_mock.maplot)

D1000

d1000_mock.res = compute_stats(ddsMat.fil, trt = "D1000", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>% 
  left_join(annotations) %>% 
  mutate(log2FoldChange = round(log2FoldChange, 2)) %>% 
  mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Amut", "SNORDmut", gene_name)) %>% 
  mutate(gene_name = if_else(gene_name == "RPS3Awt", "SNORDwt", gene_name))
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## 
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up)    : 0, 0%
## LFC < -0.25 (down) : 0, 0%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 340)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
d1000_mock.res %>% 
  filter(padj < 0.05) 
##  [1] oligo_id               baseMean               log2FoldChange        
##  [4] lfcSE                  stat                   pvalue                
##  [7] padj                   log2FoldChangeShrunken gene_name             
## [10] gene_id               
## <0 rows> (or 0-length row.names)
# Browse
d1000_mock.res %>% 
  select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>% 
  DT::datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
  ))
# Plot MA-plot
d1000_mock.maplot = d1000_mock.res %>% 
  ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(data = filter(d1000_mock.res, gene_name %in% c("MART1_ELA")), size = 3, colour = "#efc383") +
  geom_point(data = filter(d1000_mock.res, gene_name %in% c("gene812")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d1000_mock.res, gene_name %in% c("CDK4mut")), size = 3, colour = "#a6a6a6") +
  geom_point(data = filter(d1000_mock.res, gene_name %in% c("CDK4wt")), size = 3, colour = "#a6a6a6") +
  geom_text_repel(data = filter(d1000_mock.res, oligo_id %in% c("MART1_ELA", "MART1", "CDK4mut", "CDK4wt", "gene812")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
  theme_classic() +
  xlab("Mean counts (log2)") +
  ylab("Fold change log2(D1000/Mock)") 
ggplotly(d1000_mock.maplot)

CDK4#53 vs. Mock

cdk454_mock.res = compute_stats(ddsMat.fil, trt = "CDK4", ref = "Mock", fitType = "parametric", lfcThreshold = 0.25, alpha = 0.05) %>% 
  left_join(annotations) %>% 
  mutate(log2FoldChange = round(log2FoldChange, 2)) %>% 
  mutate(log2FoldChangeShrunken = round(log2FoldChangeShrunken, 2)) 
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
## 
## out of 4525 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0.25 (up)    : 0, 0%
## LFC < -0.25 (down) : 3, 0.066%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 327)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## Joining, by = "oligo_id"
# Significant oligos
cdk454_mock.res %>% 
  filter(padj < 0.05)
##     oligo_id  baseMean log2FoldChange     lfcSE       stat       pvalue
## 1  CDK4mut_a  747.0887          -2.98 0.1915627 -14.255008 2.086284e-46
## 2  CDK4mut_b  597.9877          -4.40 0.5844750  -7.094939 6.470425e-13
## 3 oligo_4374 1881.8200          -0.67 0.1050117  -4.025151 2.846933e-05
##           padj log2FoldChangeShrunken gene_name     gene_id
## 1 9.440434e-43                  -0.73   CDK4mut  CDK4mut #1
## 2 1.463934e-09                  -0.10   CDK4mut  CDK4mut #2
## 3 4.294124e-02                  -0.36  gene2105 gene2105 #2
# Browse
cdk454_mock.res %>% 
  select(gene_id, log2FoldChange, padj, gene_name, gene_id) %>% 
  DT::datatable(extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf', 'print')
  ))
# Plot MA-plot
cdk453_mock.maplot = cdk454_mock.res %>% 
  ggplot(aes(x = log2(baseMean), y = log2FoldChange, label = oligo_id)) +
  geom_point(colour = alpha("grey", 0.7)) +
  geom_point(colour = alpha("red", 0.7), data = . %>% filter(oligo_id %in% c("MART1_ELA_a", "MART1_ELA_b"))) +
  geom_point(colour = alpha("lightblue", 0.7), data = . %>% filter(oligo_id %in% c("MART1_a", "MART1_b"))) +
  geom_point(colour = alpha("orange", 0.7), data = . %>% filter(oligo_id %in% c("CDK4wt_a", "CDK4wt_b"))) +
  geom_point(colour = alpha("darkgreen", 0.7), data = . %>% filter(oligo_id %in% c("CDK4mut_a", "CDK4mut_b"))) +
  geom_text_repel(data = filter(cdk454_mock.res, oligo_id %in% c("CDK4wt_a", "CDK4wt_b", "CDK4mut_a", "CDK4mut_b")), box.padding = 0.5, max.overlaps = Inf, size = 2.5) +
  theme_classic() +
  theme_classic() +
  xlab("Mean counts (log2)") +
  ylab("Fold change log2(CDK4 #53/Mock)")  +
  ggtitle("CDK4 #53")
ggplotly(cdk453_mock.maplot)

Session info

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods  
## [8] base     
## 
## other attached packages:
##  [1] forcats_0.5.1               stringr_1.4.0              
##  [3] dplyr_1.0.9                 purrr_0.3.4                
##  [5] readr_2.1.2                 tidyr_1.2.0                
##  [7] tibble_3.1.7                tidyverse_1.3.1            
##  [9] ggpubr_0.4.0                plotly_4.10.0              
## [11] ggplotify_0.1.0             readxl_1.4.0               
## [13] DESeq2_1.34.0               SummarizedExperiment_1.24.0
## [15] Biobase_2.54.0              MatrixGenerics_1.6.0       
## [17] matrixStats_0.62.0          GenomicRanges_1.46.1       
## [19] GenomeInfoDb_1.30.1         IRanges_2.28.0             
## [21] S4Vectors_0.32.4            BiocGenerics_0.40.0        
## [23] ggrepel_0.9.1               ggplot2_3.3.6              
## [25] ggsci_2.9                   patchwork_1.1.1            
## [27] knitr_1.39                 
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3       ggsignif_0.6.3         ellipsis_0.3.2        
##   [4] XVector_0.34.0         fs_1.5.2               rstudioapi_0.13       
##   [7] farver_2.1.0           DT_0.25                bit64_4.0.5           
##  [10] AnnotationDbi_1.56.2   fansi_1.0.3            lubridate_1.8.0       
##  [13] xml2_1.3.3             splines_4.1.3          cachem_1.0.6          
##  [16] geneplotter_1.72.0     jsonlite_1.8.0         broom_0.8.0           
##  [19] annotate_1.72.0        dbplyr_2.1.1           png_0.1-7             
##  [22] BiocManager_1.30.18    compiler_4.1.3         httr_1.4.2            
##  [25] backports_1.4.1        assertthat_0.2.1       Matrix_1.4-0          
##  [28] fastmap_1.1.0          lazyeval_0.2.2         cli_3.3.0             
##  [31] htmltools_0.5.2        tools_4.1.3            gtable_0.3.0          
##  [34] glue_1.6.2             GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3          
##  [37] carData_3.0-5          cellranger_1.1.0       jquerylib_0.1.4       
##  [40] vctrs_0.4.1            Biostrings_2.62.0      crosstalk_1.2.0       
##  [43] xfun_0.30              rvest_1.0.2            lifecycle_1.0.1       
##  [46] renv_0.15.4            rstatix_0.7.0          XML_3.99-0.10         
##  [49] zlibbioc_1.40.0        scales_1.2.0           hms_1.1.1             
##  [52] parallel_4.1.3         RColorBrewer_1.1-3     yaml_2.3.5            
##  [55] memoise_2.0.1          yulab.utils_0.0.5      sass_0.4.1            
##  [58] stringi_1.7.6          RSQLite_2.2.15         highr_0.9             
##  [61] genefilter_1.76.0      BiocParallel_1.28.3    rlang_1.0.2           
##  [64] pkgconfig_2.0.3        bitops_1.0-7           evaluate_0.15         
##  [67] lattice_0.20-45        labeling_0.4.2         htmlwidgets_1.5.4     
##  [70] bit_4.0.4              tidyselect_1.1.2       magrittr_2.0.3        
##  [73] R6_2.5.1               generics_0.1.2         DelayedArray_0.20.0   
##  [76] DBI_1.1.2              pillar_1.7.0           haven_2.5.0           
##  [79] withr_2.5.0            survival_3.2-13        KEGGREST_1.34.0       
##  [82] abind_1.4-5            RCurl_1.98-1.6         modelr_0.1.8          
##  [85] crayon_1.5.1           car_3.0-13             utf8_1.2.2            
##  [88] tzdb_0.3.0             rmarkdown_2.14         locfit_1.5-9.6        
##  [91] grid_4.1.3             data.table_1.14.2      blob_1.2.3            
##  [94] reprex_2.0.1           digest_0.6.29          xtable_1.8-4          
##  [97] gridGraphics_0.5-1     munsell_0.5.0          viridisLite_0.4.0     
## [100] bslib_0.3.1